made by Ashraf Salih

In [2]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [8]:
import plotly as pt

Data import

In [8]:
fuel = pd.read_csv('datasets/fuel.csv')
In [9]:
fuel
Out[9]:
Horse Power Fuel Economy (MPG)
0 118.770799 29.344195
1 176.326567 24.695934
2 219.262465 23.952010
3 187.310009 23.384546
4 218.594340 23.426739
... ... ...
95 162.810542 27.418661
96 266.869640 15.989945
97 243.831211 19.253375
98 140.959803 29.515593
99 184.123053 25.196097

100 rows × 2 columns

In [10]:
fuel.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 2 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Horse Power         100 non-null    float64
 1   Fuel Economy (MPG)  100 non-null    float64
dtypes: float64(2)
memory usage: 1.7 KB
In [11]:
fuel.describe()
Out[11]:
Horse Power Fuel Economy (MPG)
count 100.000000 100.000000
mean 213.676190 23.178501
std 62.061726 4.701666
min 50.000000 10.000000
25% 174.996514 20.439516
50% 218.928402 23.143192
75% 251.706476 26.089933
max 350.000000 35.000000
In [11]:
import plotly.express as px

Visualitation

In [12]:
px.scatter(fuel,x='Horse Power',y='Fuel Economy (MPG)')
In [12]:
sns.scatterplot(data = fuel, x = 'Horse Power', y = 'Fuel Economy (MPG)') 
plt.show()
No description has been provided for this image
In [13]:
sns.regplot(x='Horse Power', y='Fuel Economy (MPG)', data=fuel)
plt.show()
No description has been provided for this image
In [13]:
px.histogram(fuel,x='Fuel Economy (MPG)')
In [14]:
px.histogram(fuel,x='Horse Power')

Splitting Data

In [16]:
x =fuel[['Horse Power']]
y=fuel[['Fuel Economy (MPG)']]
In [20]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.3)

Training

In [22]:
from sklearn.linear_model import LinearRegression
In [25]:
reg= LinearRegression()
In [26]:
reg.fit(X_train,y_train)
Out[26]:
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
In [29]:
reg.coef_
Out[29]:
array([[-0.07203114]])
In [30]:
reg.intercept_
Out[30]:
array([38.57636334])
In [32]:
reg.predict([[300]])
C:\Users\Ashra\anaconda3\Lib\site-packages\sklearn\base.py:493: UserWarning:

X does not have valid feature names, but LinearRegression was fitted with feature names

Out[32]:
array([[16.96702159]])
In [33]:
y_pred=reg.predict(X_test)
y_pred
Out[33]:
array([[31.98981696],
       [19.35343919],
       [28.61699715],
       [29.89774657],
       [28.46086385],
       [26.14064374],
       [24.76692693],
       [24.2315321 ],
       [16.06719351],
       [18.0526321 ],
       [22.50047897],
       [22.41674787],
       [20.88842194],
       [28.93693583],
       [21.01969214],
       [28.42286818],
       [19.71485757],
       [25.87535982],
       [21.97587415],
       [22.78263821],
       [20.86963943],
       [22.17700037],
       [29.02002311],
       [32.48454639],
       [25.94578138],
       [15.45787143],
       [25.9003797 ],
       [17.36852921],
       [19.97011895],
       [15.82766917]])
In [34]:
y_test.values
Out[34]:
array([[31.80670649],
       [15.98994481],
       [27.47341504],
       [29.67863744],
       [28.88208128],
       [27.38701207],
       [23.55672887],
       [23.01119391],
       [15.61895639],
       [18.87834992],
       [23.3071922 ],
       [21.70120173],
       [19.13999943],
       [30.67480326],
       [21.44270298],
       [29.51559288],
       [21.05039889],
       [24.6959341 ],
       [20.71572205],
       [23.95201001],
       [22.84971109],
       [21.20598653],
       [26.77814827],
       [34.11402465],
       [26.05708471],
       [15.44171107],
       [26.18847756],
       [17.27421781],
       [21.26177779],
       [16.8311881 ]])
In [35]:
reg.score(x,y)
Out[35]:
0.9095292365456379
In [36]:
from sklearn.metrics import r2_score
print(r2_score(y_test, y_pred)) 
0.9340266063799337
In [49]:
plt.scatter(X_test, y_test)
plt.plot(X_test, y_pred, color = 'r')
plt.xlabel('Horse Power (HP)')
plt.ylabel('MPG')
plt.title('HP vs. MPG (Testing Set)')
plt.show()
No description has been provided for this image